import pandas as pd
import numpy as np

filePath = "csvFile/starbucks_store_worldwide.csv"
df = pd.read_csv(filePath)
print(df.info())

# print(df.groupby(by="Country"))
#根据国家分组
# groupData = df.groupby(by="Country")
# print(groupData)

# for i in groupData:
#     print(i)
#统计不同国家的店铺数量
# countryCount = groupData["Brand"].count()
# print(countryCount["US"])
# print(countryCount["CN"])

#统计不同省份店铺数量
# chinaData = df[df["Country"] == "CN"]
# groupChina = chinaData.groupby(by="State/Province")["Brand"].count()
# print(groupChina)

#复合索引 根据Country和State/Province进行分组  df["Brand"]返回是一个Series若想返回一个Dataframe，则使用df[["Brand"]]
groupData1 = df[["Brand"]].groupby(by=[df["Country"],df["State/Province"]]).count()
# print(groupData1)

# groupData2 = df.groupby(by=["Country","State/Province"])["Brand"].count()
# print(groupData2)
#
# groupData3 = df.groupby(by=["Country","State/Province"]).count()["Brand"]
# print(groupData3)
#以上三种取值方式相等，取出值相同

print(groupData1.index)



